Here, we will cluster the cells of the patient with id “365”.
library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)
# Paths
path_to_obj <- here::here("results/R_objects/4.seurat_leukemic.rds")
path_to_save <- here::here("results/R_objects/5.seurat_clustered_365.rds")
# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
"blue", "mediumorchid2", "coral2", "blueviolet",
"indianred4", "deepskyblue1", "dimgray", "deeppink1",
"green", "lightgray", "hotpink1")
# Source functions
source(here::here("bin/utils.R"))
# Thresholds
seurat <- readRDS(path_to_obj)
Of note, everytime that we filter out a specific population, the set of highly variable genes (HVG) and the axis of variability (PCs) changes. Thus, we need to rerun the main analysis steps iteratively.
First, we focus on the patient studied in this notebook:
seurat <- subset(seurat, donor_id == "365")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat, group.by = "subproject")
As we can see, there is a large batch effect between the two experiments we conducted BCLLATLAS_10 and BCLLATLAS_29. However, contrary to the previous cases, here BCLLATLAS_29 contains all time-points and has a better quality than BCLLATLAS_10. Thus, in this case we will focus first on BCLLATLAS_29 (not hashed) and later in BCLLATLAS_10 (hashed).
seurat <- subset(seurat, subproject == "BCLLATLAS_29")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat, group.by = "subproject")
FeaturePlot(seurat, "pct_mt") +
scale_color_viridis_c(option = "magma")
FeaturePlot(seurat, "nFeature_RNA") +
scale_color_viridis_c(option = "magma")
We see a subpopulation with characteristics of low-quality cells: high mitochondrial expression and low number of detected features (genes). Hence, let us exclude it from the dataset:
seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5134
## Number of edges: 172844
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8371
## Number of communities: 5
## Elapsed time: 1 seconds
DimPlot(seurat)
seurat <- subset(seurat, seurat_clusters != "2")
seurat <- process_seurat(seurat, dims = 1:20)
DimPlot(seurat)
DimPlot(seurat, group.by = "time_point")
FeaturePlot(seurat, features = "NKG7")
Despite our efforts, we still have some cells from the microenvironment:
# microenv_barcodes <- CellSelector(FeaturePlot(seurat, features = "NKG7"))
microenv_barcodes <- c(
"s6dm1ahu_e8w2ucdi_AAGTGAACAGGTCCGT-1",
"s6dm1ahu_e8w2ucdi_CGGGACTCACCTTCGT-1",
"s6dm1ahu_e8w2ucdi_GAAGGGTGTAGTGATA-1",
"s6dm1ahu_e8w2ucdi_GATCCCTAGGTCCCGT-1",
"s6dm1ahu_e8w2ucdi_TACAACGGTCCTGAAT-1",
"s6dm1ahu_e8w2ucdi_TACCCGTAGTAGTCAA-1"
)
seurat$is_microenv <- ifelse(
colnames(seurat) %in% microenv_barcodes,
TRUE,
FALSE
)
Idents(seurat) <- "is_microenv"
markers_microenv <- FindMarkers(
seurat,
ident.1 = TRUE,
ident.2 = FALSE,
only.pos = TRUE,
logfc.threshold = 0.75
)
DT::datatable(markers_microenv)
Filter out:
seurat <- subset(seurat, is_microenv == FALSE)
seurat <- FindNeighbors(seurat, reduction = "pca", dims = 1:20)
seurat <- FindClusters(seurat, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4685
## Number of edges: 158004
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9294
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(seurat)
# Find subclusters Richter
seurat <- FindSubCluster(
seurat,
cluster = "1",
subcluster.name = "richter_subclusters",
graph.name = "RNA_snn",
resolution = 0.3
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 562
## Number of edges: 19955
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8310
## Number of communities: 4
## Elapsed time: 0 seconds
DimPlot(seurat, group.by = "richter_subclusters")
# Find subclusters CLL
Idents(seurat) <- "richter_subclusters"
seurat <- FindSubCluster(
seurat,
cluster = "0",
subcluster.name = "cll_subclusters",
graph.name = "RNA_snn",
resolution = 0.2
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4123
## Number of edges: 137942
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8221
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(seurat, group.by = "cll_subclusters")
Idents(seurat) <- "cll_subclusters"
markers_0 <- FindMarkers(
seurat,
ident.1 = "0_1",
ident.2 = "0_0",
only.pos = FALSE
)
markers_0 <- markers_0 %>%
rownames_to_column("gene") %>%
arrange(desc(avg_log2FC))
DT::datatable(markers_0)
Importantly we see that in the CLL cluster the clusters are not very robust, since the fold-changes are very small. Thus, we will consider it as a single cluster:
seurat$final_clusters <- seurat$richter_subclusters
Idents(seurat) <- "final_clusters"
DimPlot(seurat)
saveRDS(seurat, path_to_save)
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.6 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 ggplot2_3.3.3 tidyverse_1.3.1 harmony_1.0 Rcpp_1.0.6 SeuratWrappers_0.3.0 SeuratObject_4.0.2 Seurat_4.0.3 BiocStyle_2.18.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2 splines_4.0.4 crosstalk_1.1.1 listenv_0.8.0 scattermore_0.7 digest_0.6.27 htmltools_0.5.1.1 fansi_0.5.0 magrittr_2.0.1 tensor_1.5 cluster_2.1.1 ROCR_1.0-11 limma_3.46.0 remotes_2.4.0 globals_0.14.0 modelr_0.1.8 matrixStats_0.59.0 spatstat.sparse_2.0-0 colorspace_2.0-1 rvest_1.0.0 ggrepel_0.9.1 haven_2.4.1 xfun_0.23 crayon_1.4.1 jsonlite_1.7.2 spatstat.data_2.1-0 survival_3.2-10 zoo_1.8-9 glue_1.4.2 polyclip_1.10-0 gtable_0.3.0 leiden_0.3.8 future.apply_1.7.0 abind_1.4-5 scales_1.1.1 DBI_1.1.1 miniUI_0.1.1.1 viridisLite_0.4.0 xtable_1.8-4 reticulate_1.20 spatstat.core_2.1-2 rsvd_1.0.5 DT_0.18 htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.2 ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3
## [55] sass_0.4.0 uwot_0.1.10 dbplyr_2.1.1 deldir_0.2-10 here_1.0.1 utf8_1.2.1 labeling_0.4.2 tidyselect_1.1.1 rlang_0.4.11 reshape2_1.4.4 later_1.2.0 munsell_0.5.0 cellranger_1.1.0 tools_4.0.4 cli_2.5.0 generics_0.1.0 broom_0.7.7 ggridges_0.5.3 evaluate_0.14 fastmap_1.1.0 yaml_2.2.1 goftest_1.2-2 knitr_1.33 fs_1.5.0 fitdistrplus_1.1-5 RANN_2.6.1 pbapply_1.4-3 future_1.21.0 nlme_3.1-152 mime_0.10 xml2_1.3.2 compiler_4.0.4 rstudioapi_0.13 plotly_4.9.4 png_0.1-7 spatstat.utils_2.2-0 reprex_2.0.0 bslib_0.2.5.1 stringi_1.6.2 highr_0.9 RSpectra_0.16-0 lattice_0.20-41 Matrix_1.3-4 vctrs_0.3.8 pillar_1.6.1 lifecycle_1.0.0 BiocManager_1.30.15 spatstat.geom_2.1-0 lmtest_0.9-38 jquerylib_0.1.4 RcppAnnoy_0.0.18 data.table_1.14.0 cowplot_1.1.1 irlba_2.3.3
## [109] httpuv_1.6.1 patchwork_1.1.1 R6_2.5.0 bookdown_0.22 promises_1.2.0.1 KernSmooth_2.23-18 gridExtra_2.3 parallelly_1.26.0 codetools_0.2-18 MASS_7.3-53.1 assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.2 sctransform_0.3.2 mgcv_1.8-36 parallel_4.0.4 hms_1.1.0 grid_4.0.4 rpart_4.1-15 rmarkdown_2.8 Rtsne_0.15 shiny_1.6.0 lubridate_1.7.10